library("knitr") # for knitting RMarkdown
library("kableExtra") # for making nice tables
library("janitor") # for cleaning column names
library("greta") # for writing Bayesian models
library("tidybayes") # tidying up results from Bayesian models
library("cowplot") # for making figure panels
library("ggrepel") # for labels in ggplots
library("gganimate") # for animations
library("extraDistr") # additional probability distributions
library("tidyverse") # for wrangling, plotting, etc.
theme_set(
theme_classic() + #set the theme
theme(text = element_text(size = 20)) #set the default text size
)
# data
data = c(0, 1, 1, 0, 1, 1, 1, 1)
# whether observation is a success or failure
success = c(0, cumsum(data))
failure = c(0, cumsum(1 - data))
# I've added 0 at the beginning to show the prior
# plotting function
fun.plot_beta = function(success, failure){
ggplot(data = tibble(x = c(0, 1)),
mapping = aes(x = x)) +
stat_function(fun = "dbeta",
args = list(shape1 = success + 1, shape2 = failure + 1),
geom = "area",
color = "black",
fill = "lightblue") +
coord_cartesian(expand = F) +
scale_x_continuous(breaks = seq(0.25, 0.75, 0.25)) +
theme(axis.title = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.margin = margin(r = 1, t = 0.5, unit = "cm"))
}
# generate the plots
plots = map2(success, failure, ~ fun.plot_beta(.x, .y))
# make a grid of plots
plot_grid(plotlist = plots)
Is the coin biased?
# data
data = rep(0:1, c(8, 2))
# parameters
theta = c(0.1, 0.5, 0.9)
# prior
prior = c(0.25, 0.5, 0.25)
# prior = c(0.1, 0.1, 0.8) # alternative setting of the prior
# prior = c(0.000001, 0.000001, 0.999998) # another prior setting
# likelihood
likelihood = dbinom(sum(data == 1), size = length(data), prob = theta)
# posterior
posterior = likelihood * prior / sum(likelihood * prior)
# store in data frame
df.coins = tibble(
theta = theta,
prior = prior,
likelihood = likelihood,
posterior = posterior
)
Visualize the results:
df.coins %>%
gather("index", "value", -theta) %>%
mutate(index = factor(index, levels = c("prior", "likelihood", "posterior")),
theta = factor(theta, labels = c("p = 0.1", "p = 0.5", "p = 0.9"))) %>%
ggplot(data = .,
mapping = aes(x = theta,
y = value,
fill = index)) +
geom_bar(stat = "identity",
color = "black") +
facet_grid(rows = vars(index),
switch = "y",
scales = "free") +
annotate("segment", x = -Inf, xend = Inf, y = -Inf, yend = -Inf) +
annotate("segment", x = -Inf, xend = -Inf, y = -Inf, yend = Inf) +
theme(legend.position = "none",
strip.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.x = element_blank(),
axis.line = element_blank())
# grid
theta = seq(0, 1, 0.01)
# data
data = rep(0:1, c(8, 2))
# calculate posterior
df.prior_effect = tibble(theta = theta,
prior_uniform = dbeta(theta, shape1 = 1, shape2 = 1),
prior_normal = dbeta(theta, shape1 = 5, shape2 = 5),
prior_biased = dbeta(theta, shape1 = 8, shape2 = 2)) %>%
gather("prior_index", "prior", -theta) %>%
mutate(likelihood = dbinom(sum(data == 1),
size = length(data),
prob = theta)) %>%
group_by(prior_index) %>%
mutate(posterior = likelihood * prior / sum(likelihood * prior)) %>%
ungroup() %>%
gather("index", "value", -c(theta, prior_index))
# make the plot
df.prior_effect %>%
mutate(index = factor(index, levels = c("prior", "likelihood", "posterior")),
prior_index = factor(prior_index,
levels = c("prior_uniform", "prior_normal", "prior_biased"),
labels = c("uniform", "symmetric", "asymmetric"))) %>%
ggplot(data = .,
mapping = aes(x = theta,
y = value,
color = index)) +
geom_line(size = 1) +
facet_grid(cols = vars(prior_index),
rows = vars(index),
scales = "free",
switch = "y") +
scale_x_continuous(breaks = seq(0, 1, 0.2)) +
annotate("segment", x = -Inf, xend = Inf, y = -Inf, yend = -Inf) +
annotate("segment", x = -Inf, xend = -Inf, y = -Inf, yend = Inf) +
theme(legend.position = "none",
strip.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size = 10),
axis.line = element_blank())
Figure 22.1: Illustration of how the prior affects the posterior.
# grid
theta = seq(0, 1, 0.01)
df.likelihood_effect = tibble(theta = theta,
prior = dbeta(theta, shape1 = 2, shape2 = 8),
likelihood_left = dbeta(theta, shape1 = 1, shape2 = 9),
likelihood_center = dbeta(theta, shape1 = 5, shape2 = 5),
likelihood_right = dbeta(theta, shape1 = 9, shape2 = 1)) %>%
gather("likelihood_index", "likelihood", -c("theta", "prior")) %>%
group_by(likelihood_index) %>%
mutate(posterior = likelihood * prior / sum(likelihood * prior)) %>%
ungroup() %>%
gather("index", "value", -c(theta, likelihood_index))
df.likelihood_effect %>%
mutate(index = factor(index, levels = c("prior", "likelihood", "posterior")),
likelihood_index = factor(likelihood_index,
levels = c("likelihood_left", "likelihood_center", "likelihood_right"),
labels = c("left", "center", "right"))) %>%
ggplot(data = .,
mapping = aes(x = theta,
y = value,
color = index)) +
geom_line(size = 1) +
facet_grid(cols = vars(likelihood_index),
rows = vars(index),
scales = "free",
switch = "y") +
scale_x_continuous(breaks = seq(0, 1, 0.2)) +
annotate("segment", x = -Inf, xend = Inf, y = -Inf, yend = -Inf) +
annotate("segment", x = -Inf, xend = -Inf, y = -Inf, yend = Inf) +
theme(legend.position = "none",
strip.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size = 10),
axis.line = element_blank(),
strip.text.x = element_blank())
Figure 22.2: Illustration of how the likelihood of the data affects the posterior.
# grid
theta = seq(0, 1, 0.01)
df.sample_size_effect = tibble(theta = theta,
prior = dbeta(theta, shape1 = 5, shape2 = 5),
likelihood_low = dbeta(theta, shape1 = 2, shape2 = 8),
likelihood_medium = dbeta(theta, shape1 = 10, shape2 = 40),
likelihood_high = dbeta(theta, shape1 = 20, shape2 = 80)) %>%
gather("likelihood_index", "likelihood", -c("theta", "prior")) %>%
group_by(likelihood_index) %>%
mutate(posterior = likelihood * prior / sum(likelihood * prior)) %>%
ungroup() %>%
gather("index", "value", -c(theta, likelihood_index))
df.sample_size_effect %>%
mutate(index = factor(index, levels = c("prior", "likelihood", "posterior")),
likelihood_index = factor(likelihood_index,
levels = c("likelihood_low", "likelihood_medium", "likelihood_high"),
labels = c("n = low", "n = medium", "n = high"))) %>%
ggplot(data = .,
mapping = aes(x = theta,
y = value,
color = index)) +
geom_line(size = 1) +
facet_grid(cols = vars(likelihood_index),
rows = vars(index),
scales = "free",
switch = "y") +
scale_x_continuous(breaks = seq(0, 1, 0.2)) +
annotate("segment", x = -Inf, xend = Inf, y = -Inf, yend = -Inf) +
annotate("segment", x = -Inf, xend = -Inf, y = -Inf, yend = Inf) +
theme(legend.position = "none",
strip.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size = 10),
axis.line = element_blank())
tibble(x = c(-5, 5)) %>%
ggplot(aes(x = x)) +
stat_function(fun = "dnorm",
size = 1,
color = "blue") +
stat_function(fun = "dt",
size = 1,
color = "red",
args = list(df = 1))
Figure 22.3: Comparison between the normal distribution and the student-t distribution.
fun.draw_beta = function(shape1, shape2){
ggplot(data = tibble(x = c(0, 1)),
aes(x = x)) +
stat_function(fun = "dbeta",
size = 1,
color = "black",
args = list(shape1 = shape1, shape2 = shape2)) +
annotate(geom = "text",
label = str_c("Beta(", shape1,",",shape2,")"),
x = 0.5,
y = Inf,
hjust = 0.5,
vjust = 1.1,
size = 4) +
scale_x_continuous(breaks = seq(0, 1, 0.2)) +
theme(axis.title.x = element_blank())
}
shape1 = c(1, 0.5, 5, 1, 8, 20)
shape2 = c(1, 0.5, 5, 9, 2, 20)
p.list = map2(.x = shape1, .y = shape2, ~ fun.draw_beta(.x, .y))
plot_grid(plotlist = p.list)
Figure 22.4: Beta distributions with different parameter settings.
tibble(x = c(-10, 10)) %>%
ggplot(aes(x = x)) +
stat_function(fun = "dnorm",
size = 1,
color = "blue",
args = list(sd = 1)) +
stat_function(fun = "dnorm",
size = 1,
color = "red",
args = list(sd = 5))
Figure 22.5: Normal distributions with different standard deviation.
tibble(x = c(0, 10)) %>%
ggplot(aes(x = x)) +
stat_function(fun = "dcauchy",
size = 1,
color = "blue",
args = list(location = 0, scale = 1),
xlim = c(0, 10)) +
stat_function(fun = "dgamma",
size = 1,
color = "red",
args = list(shape = 4, rate = 2))
Figure 22.6: Cauchy and Gamma distribution.
Example for how we can compute probabilities based on random samples generated from a distribution.
# generate samples
df.samples = tibble(x = rnorm(n = 10000, mean = 1, sd = 2))
# visualize distribution
ggplot(data = df.samples,
mapping = aes(x = x)) +
stat_density(geom = "line",
color = "red",
size = 2) +
stat_function(fun = "dnorm",
args = list(mean = 1, sd = 2),
color = "black",
linetype = 2)
# calculate probability based on samples
df.samples %>%
summarize(prob = sum(x >= 0 & x < 4)/n())
# calculate probability based on theoretical distribution
pnorm(4, mean = 1, sd = 2) - pnorm(0, mean = 1, sd = 2)
# A tibble: 1 x 1
prob
<dbl>
1 0.626
[1] 0.6246553
You can find out more about how get started with “greta” here: https://greta-stats.org/articles/get_started.html. Make sure to install the development version of “greta” (as shown in the “install-packages” code chunk above: devtools::install_github("greta-dev/greta")).
# load the attitude data set
df.attitude = attitude
Visualize relationship between how well complaints are handled and the overall rating of an employee
ggplot(data = df.attitude,
mapping = aes(x = complaints,
y = rating)) +
geom_point()
# fit model
fit = lm(formula = rating ~ 1 + complaints,
data = df.attitude)
# print summary
fit %>% summary()
Call:
lm(formula = rating ~ 1 + complaints, data = df.attitude)
Residuals:
Min 1Q Median 3Q Max
-12.8799 -5.9905 0.1783 6.2978 9.6294
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 14.37632 6.61999 2.172 0.0385 *
complaints 0.75461 0.09753 7.737 1.99e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 6.993 on 28 degrees of freedom
Multiple R-squared: 0.6813, Adjusted R-squared: 0.6699
F-statistic: 59.86 on 1 and 28 DF, p-value: 1.988e-08
Visualize the model’s predictions
ggplot(data = df.attitude,
mapping = aes(x = complaints,
y = rating)) +
geom_smooth(method = "lm",
color = "black") +
geom_point()
# variables & priors
b0 = normal(0, 10)
b1 = normal(0, 10)
sd = cauchy(0, 3, truncation = c(0, Inf))
# linear predictor
mu = b0 + b1 * df.attitude$complaints
# observation model (likelihood)
distribution(df.attitude$rating) = normal(mu, sd)
# define the model
m = model(b0, b1, sd)
Visualize the model as graph:
# plotting
plot(m)
Draw samples from the posterior distribution:
# sampling
draws = mcmc(m, n_samples = 1000)
# tidy up the draws
df.draws = tidy_draws(draws) %>%
clean_names()
These are the priors I used for the intercept, regression weights, and the standard deviation of the Gaussian likelihood function:
# Gaussian
ggplot(tibble(x = c(-30, 30)),
aes(x = x)) +
stat_function(fun = "dnorm",
size = 2,
args = list(sd = 10))
# Cauchy
ggplot(tibble(x = c(0, 30)),
aes(x = x)) +
stat_function(fun = "dcauchy",
size = 2,
args = list(location = 0,
scale = 3))
This is what the posterior looks like for the three parameters in the model:
df.draws %>%
select(draw:sd) %>%
gather("index", "value", -draw) %>%
ggplot(data = .,
mapping = aes(x = value)) +
stat_density(geom = "line") +
facet_grid(rows = vars(index),
scales = "free_y",
switch = "y") +
annotate("segment", x = -Inf, xend = Inf, y = -Inf, yend = -Inf) +
annotate("segment", x = -Inf, xend = -Inf, y = -Inf, yend = Inf) +
theme(legend.position = "none",
strip.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size = 10),
axis.line = element_blank(),
strip.text.x = element_blank())
Let’s take some samples from the posterior to visualize the model predictions:
ggplot(data = df.attitude,
mapping = aes(x = complaints,
y = rating)) +
geom_abline(data = df.draws %>%
sample_n(size = 50),
aes(intercept = b0,
slope = b1),
alpha = 0.3,
color = "lightblue") +
geom_point()
Let’s make an animation that illustrates what predicted data sets (based on samples from the posterior) would look like:
p = df.draws %>%
sample_n(size = 10) %>%
mutate(complaints = list(seq(min(df.attitude$complaints),
max(df.attitude$complaints),
length.out = nrow(df.attitude)))) %>%
unnest(complaints) %>%
mutate(prediction = b0 + b1 * complaints + rnorm(n(), sd = sd)) %>%
ggplot(aes(x = complaints, y = prediction)) +
geom_point(alpha = 0.8,
color = "lightblue") +
geom_point(data = df.attitude,
aes(y = rating,
x = complaints)) +
coord_cartesian(xlim = c(20, 100),
ylim = c(20, 100)) +
transition_manual(draw)
animate(p, nframes = 60, width = 800, height = 600, res = 96, type = "cairo")
# anim_save("posterior_predictive.gif")
And let’s illustrate what data we would have expected to see just based on the information that we encoded in our priors.
sample_size = 10
p = tibble(
b0 = rnorm(sample_size, mean = 0, sd = 10),
b1 = rnorm(sample_size, mean = 0, sd = 10),
sd = rhcauchy(sample_size, sigma = 3),
draw = 1:sample_size
) %>%
mutate(complaints = list(runif(nrow(df.attitude),
min = min(df.attitude$complaints),
max = max(df.attitude$complaints)))) %>%
unnest(complaints) %>%
mutate(prediction = b0 + b1 * complaints + rnorm(n(), sd = sd)) %>%
ggplot(aes(x = complaints, y = prediction)) +
geom_point(alpha = 0.8,
color = "lightblue") +
geom_point(data = df.attitude,
aes(y = rating,
x = complaints)) +
transition_manual(draw)
animate(p, nframes = 60, width = 800, height = 600, res = 96, type = "cairo")
# anim_save("prior_predictive.gif")
Information about this R session including which version of R was used, and what packages were loaded.
sessionInfo()
R version 3.5.2 (2018-12-20)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Mojave 10.14.3
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.0.1
[4] purrr_0.3.1 readr_1.3.1 tidyr_0.8.3
[7] tibble_2.0.1 tidyverse_1.2.1 extraDistr_1.8.10
[10] gganimate_1.0.2 ggrepel_0.8.0 cowplot_0.9.4
[13] ggplot2_3.1.0 tidybayes_1.0.4 greta_0.3.0.9001
[16] janitor_1.1.1 kableExtra_1.0.1 knitr_1.22
[19] styler_1.1.0.9000
loaded via a namespace (and not attached):
[1] nlme_3.1-137 lubridate_1.7.4
[3] webshot_0.5.1 RColorBrewer_1.1-2
[5] progress_1.2.0.9000 httr_1.4.0
[7] tools_3.5.2 backports_1.1.3
[9] utf8_1.1.4 R6_2.4.0
[11] lazyeval_0.2.1 colorspace_1.4-0
[13] withr_2.1.2 gridExtra_2.3
[15] tidyselect_0.2.5 prettyunits_1.0.2
[17] compiler_3.5.2 cli_1.0.1
[19] rvest_0.3.2 arrayhelpers_1.0-20160527
[21] xml2_1.2.0 influenceR_0.1.0
[23] labeling_0.3 bookdown_0.9
[25] scales_1.0.0 tfruns_1.4
[27] digest_0.6.18 rmarkdown_1.11
[29] base64enc_0.1-3 pkgconfig_2.0.2
[31] htmltools_0.3.6 highr_0.7
[33] htmlwidgets_1.3 rlang_0.3.1
[35] readxl_1.3.0 rstudioapi_0.9.0
[37] visNetwork_2.0.5 farver_1.1.0
[39] generics_0.0.2 svUnit_0.7-12
[41] jsonlite_1.6 tensorflow_1.10
[43] rgexf_0.15.3 magrittr_1.5
[45] Matrix_1.2-15 Rcpp_1.0.0
[47] munsell_0.5.0 fansi_0.4.0
[49] viridis_0.5.1 reticulate_1.11.1
[51] stringi_1.3.1 whisker_0.3-2
[53] yaml_2.2.0 plyr_1.8.4
[55] ggstance_0.3.1 grid_3.5.2
[57] parallel_3.5.2 listenv_0.7.0
[59] crayon_1.3.4 lattice_0.20-38
[61] haven_2.1.0 hms_0.4.2
[63] pillar_1.3.1 igraph_1.2.4
[65] reshape2_1.4.3 codetools_0.2-16
[67] XML_3.98-1.19 glue_1.3.1
[69] evaluate_0.13 downloader_0.4
[71] gifski_0.8.6 modelr_0.1.4
[73] png_0.1-7 tweenr_1.0.1
[75] cellranger_1.1.0 gtable_0.2.0
[77] future_1.12.0 assertthat_0.2.0
[79] xfun_0.5 broom_0.5.1
[81] coda_0.19-2 viridisLite_0.3.0
[83] Rook_1.1-1 DiagrammeR_1.0.0
[85] globals_0.12.4 brew_1.0-6